
exec(open('init_path.py').read())
exec((P_Lib/'GasStation.py').read_text())
get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


YMD = read_hdf(P_Research / 'Others' / 'Datetime' / 'utils_dates_YMD.h5', 'utils')
dict_intYMD_to_dtWeekStart = YMD.set_index('intYMD').dtWeekStart.to_dict()


# In[3]:



sBreak = read_stata(P_Data /'Processed'/'adopter_measures_4week_e5.dta')
sBreak.columns = ['StID','bDate']
dict_stid_to_breakdate = sBreak.set_index('StID').bDate.to_dict()
ls_stid_with_break = sBreak.StID.tolist()


markers = ['PCCount','PCResponse']

# In[5]:

for marker in markers:

    if marker == 'PCCount':
        f = P_GS_Data / 'PCCount' / 'daily_weekday_nonholiday-7to21-positive_price_only_e5.dta'
        df = read_stata(f).rename(columns={marker:'V'})
        df['W'] = df.YMD.map(dict_intYMD_to_dtWeekStart) # convert day to week
        df = df.groupby(['StID','W']).V.mean().reset_index() # calculate weekly-average
    elif marker == 'PCResponse':
        f = P_GS_Data / 'PCResponse' / 'rival_diffbrand_daily_weekday_nonholiday_e5.dta'
        df = read_stata(f, 'GS').rename(columns={'Response':'V'})
        df['W'] = df.YMD.map(dict_intYMD_to_dtWeekStart) # convert day to week
        df = df.groupby(['StID','W']).V.mean().reset_index() # calculate weekly-average
    df.head(2)


    # In[6]:


    df = merge(df, sBreak) # merge with break dates
    df['WDiff'] = ((df.W - df.bDate).dt.days / 7).astype(int) # time gap
    df.head(2)


    # In[7]:


    # calculate 1-month PRE-break-date avg
    PreV = df[(df.WDiff>=-4)&(df.WDiff<=-1)].groupby('StID').V.mean()
    PreV.name = 'PreV'
    # calculate 1-month POST-break-date avg
    PostV = df[(df.WDiff>=1)&(df.WDiff<=4)].groupby('StID').V.mean()
    PostV.name = 'PostV'
    PostV.head(2)


    # In[8]:


    # merge with data to get first week after break-date with lower/higher V
    df = merge(df, PreV.reset_index())
    df.head(2)


    # In[9]:


    df['MAV'] = df.groupby('StID').V.transform(lambda x: x.rolling(4).mean())


    # In[10]:


    df = df[df.WDiff>=1]

    if marker == 'PCCount':
        FirstReversal = df[df.V<df.PreV].groupby('StID').WDiff.first()
    elif marker in ['PCSize','PCResponse']:
        FirstReversal = df[df.V>df.PreV].groupby('StID').WDiff.first()
    FirstReversal.name = 'FirstReversal'

    if marker == 'PCCount':
        FirstReversalMA = df[df.MAV<df.PreV].groupby('StID').WDiff.first()
    elif marker in ['PCSize','PCResponse']:
        FirstReversalMA = df[df.MAV>df.PreV].groupby('StID').WDiff.first()
    FirstReversalMA.name = 'FirstReversalMA'


    # In[27]:


    df = concat([PreV, PostV, FirstReversal, FirstReversalMA], axis=1)
    df['bSize'] = df.PostV - df.PreV
    df['bSizePct'] = df.bSize / df.PreV
    df.head(2)


    # In[28]:


    v = df.FirstReversal.max()
    df['FirstReversal'] = df.FirstReversal.fillna(v)
    df['FirstReversalMA'] = df.FirstReversalMA.fillna(v)


    # In[29]:


    df.to_hdf(P_GS_Data / 'sBreak' / (marker+'.h5'), 'GS', mode='w', complevel=9, complib='blosc')

    
    

marker = 'PCCount'
PCCount = read_hdf(P_GS_Data / 'sBreak' / (marker+'.h5'), 'GS')


# In[16]:


s1 = PCCount[(PCCount.bSize>PCCount.bSize.quantile(.25))
       &(PCCount.FirstReversalMA>=9)]
s1.shape


# In[23]:


s1.reset_index()[['StID']].to_stata(P_GS_Data / 'sBreak' / 'big_breaks_pccount.dta', write_index=False)


# In[24]:


marker = 'PCSize'
PCSize = read_hdf(P_GS_Data / 'sBreak' / (marker+'.h5'), 'GS')


# In[25]:


s2 = PCSize[(PCSize.bSize<PCSize.bSize.quantile(.75))
       &(PCSize.FirstReversalMA>=9)]
s2.shape


# In[26]:


s2.reset_index()[['StID']].to_stata(P_GS_Data / 'sBreak' / 'big_breaks_pcsize.dta', write_index=False)


# In[27]:


marker = 'PCResponse'
PCResponse = read_hdf(P_GS_Data / 'sBreak' / (marker+'.h5'), 'GS')


# In[28]:

# In[29]:


s3 = PCResponse[(PCResponse.bSize<0)
       &(PCResponse.FirstReversalMA>=9)]
s3.shape


# In[30]:


s3.reset_index()[['StID']].to_stata(P_GS_Data / 'sBreak' / 'big_breaks_pcresponse.dta', write_index=False)

